package com.xp.ai.filesummary;

import dev.langchain4j.data.document.DocumentParser;
import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser;

/**
 * @author xp
 */
public class DocumentParseUtil {


    /**
     * 根据文件后缀名获取解析器
     * @param fileName
     * @return
     */
    public static  DocumentParser  getParser(String fileName){
        if(fileName.endsWith("doc") || fileName.endsWith("docx")){
            return new ApachePoiDocumentParser();
        } else if (fileName.endsWith("pdf")) {
            return new ApachePdfBoxDocumentParser(true);
        }

        return null;
    }
}
